#Alexander F. Gazmararian
#afg2@princeton.edu
#January 9, 2024

#Purpose: Clean coal price data for subsequent analyses

#Load packages
library(tidyverse)
library(tidylog)
library(here)

#load 1949-2011 data
eia79 <- read.csv(here("data", "input", "eia_7.9coalprices/stb0709.csv"), skip = 3)
#select relevant columns
eia79 <- eia79[, -c(3,5,7,9,11,13,15,17,19,21)]
#adjust names
names(eia79) <- c("year", "bit_n", "bit_r", "subbit_n", "subbit_r", "lig_n", "lig_r", "ant_n", "ant_r", "coal_n", "coal_r")
eia79 <- subset(eia79, select=c(year,coal_r))
eia79 <- na.omit(eia79)
eia79$year <- as.numeric(eia79$year)
names(eia79)[2] <- "coal_eia79"
#load 2008-2020 data
eia7a <- read.csv(here("data", "input", "eia_7acoalprices", "Market_average_price.csv"),skip=4)
eia7a <- subset(eia7a, description %in% c("United States : captive", "United States : open market"))
eia7a <- eia7a %>%
  mutate(across(X2001:X2021, ~ as.numeric(.x))) %>%
  pivot_longer(cols=c(X2001:X2021),names_to="year",values_to="coal_eia7a") %>%
  group_by(year) %>%
  summarize(coal_eia7a=mean(coal_eia7a)) %>%
  mutate(year=as.numeric(gsub("X","",year)))
#bind coal together
coal <- full_join(eia79, eia7a, by = "year")
#Create figure to diagnose correspondence
p.coal <- coal %>%
  pivot_longer(cols=c(coal_eia79, coal_eia7a)) %>%
  mutate(name=ifelse(name=="coal_eia79", "AER Table 7.9", "Form EIA-7A")) %>%
  ggplot(aes(x=year,y=value,lty=name,color=name)) +
  geom_line(size=1) +
  labs(lty="",color="",x="Year",y="Average Coal Price ($ per Short Ton)") +
  scale_color_manual(values=c("black","grey")) +
  theme_bw(base_size=14) +
  scale_y_continuous(labels=scales::dollar,limits=c(0,60)) +
  scale_x_continuous(breaks=seq(1950,2020,10)) + 
  theme(
    panel.grid = element_blank(),
    legend.position = c(.85,.85)
  )
#Figure E6
ggsave(
  p.coal,
  filename = here("output", "figures", "si_fig_E6_coalprices.png"),
  width = 5.25,
  height = 3,
  scale = 1.5,
  dpi = 300
)
#check correlation between the two
cor(coal$coal_eia79,coal$coal_eia7a,use="complete")
cor.test(coal$coal_eia79,coal$coal_eia7a,use="complete")

#load gas data
gas <- read.csv(here("data", "input", "fuelprices", "Henry_Hub_Natural_Gas_Spot_Price.csv"))
gas$Henry.Hub.Natural.Gas.Spot.Price <- as.numeric(gas$Henry.Hub.Natural.Gas.Spot.Price)
gas <- na.omit(gas)
gas$year <- rownames(gas)
names(gas)[1] <- "gas_r"
gas <- as.data.frame(gas)
gas$year <- as.numeric(gas$year)
#prices together
fuelprices <- full_join(gas, coal, by = "year")
fuelprices$coal_r <- c(fuelprices$coal_eia7a[c(1:21)], fuelprices$coal_eia79[c(22:73)])
fuelprices <- subset(fuelprices, select = c(gas_r, coal_r, year), year >= 1997)
fuelprices$coalgas_ratio_z <- with(fuelprices, scale(coal_r / gas_r)[,1])
p.std <- fuelprices %>%
  pivot_longer(cols=c(coal_r,gas_r)) %>%
  group_by(name) %>%
  mutate(value = scale(value)[,1],
         name = ifelse(name=="coal_r","Coal","Gas")) %>%
  ggplot() +
  geom_vline(xintercept=2008, color = "red") +
  geom_line(aes(x=year,y=value,lty=name,color=name)) +
  scale_color_grey() +
  labs(x="Year",y="Standardize Price",title="Standardized Prices",lty="",color="") +
  theme_bw(base_size = 14) +
  theme(
    panel.grid = element_blank(),
    legend.position=c(.85,.85),
    legend.background = element_rect(fill='transparent'),
    legend.box.background = element_rect(fill='transparent',color="transparent"),
    plot.title = element_text(hjust=.5)
  )
p.abs <- fuelprices %>%
  ggplot() +
  geom_vline(xintercept=2008, color = "red") +
  geom_line(aes(x=year,y=coalgas_ratio_z)) +
  labs(x="Year",y="Coal-to-Gas Price Ratio",title="Real Price Ratio (Standardized)",lty="",color="") +
  theme_bw(base_size = 14) +
  theme(
    panel.grid = element_blank(),
    legend.position=c(.85,.85),
    legend.background = element_rect(fill='transparent'),
    legend.box.background = element_rect(fill='transparent',color="transparent"),
    plot.title = element_text(hjust=.5)
  )
#Figure E7
p.out <- gridExtra::grid.arrange(p.abs,p.std,ncol=2)
ggsave(
  p.out,
  filename = here("output", "figures", "si_fig_E7_coal_ratio.png"),
  dpi = 300,
  width = 6.5,
  height = 2.8,
  scale = 1.5
)
#save data for analysis
saveRDS(fuelprices, here("data", "inter", "fuelprices.rds"))
